package org.shirdrn.document.preprocessing.api.constants;

public interface ConfigKeys {

	String DATASET_FILE_CHARSET = "dataset.file.charset";
	String DATASET_TRAIN_FILE_EXTENSION = "dataset.train.file.extension";
	//以下这三个 这个是每个用户请求数据处理之后的,将原始的文本要生成terms.txt(也就是词典) label.txt(每个类中有多少个文档) train.txt(根据自然文本生成的词向量) class idx1:w1 idx2:w2这样格式
	//对于每个会话要是相互独立的,所以在这个变量指定文件中要根据用户唯一标识来确定文件夹,确定不同用户不相互影响
	String DATASET_FEATURE_TERM_VECTOR_FILE = "dataset.feature.term.vector.file";
	String DATASET_LABEL_VECTOR_FILE = "dataset.label.vector.file";
	String DATASET_TRAIN_SVM_VECTOR_OUTPUT_DIR = "dataset.train.svm.vector.output.dir";
	//训练的数据集的输入目录.也就是放原始自然文本的数据集的文件夹,每个类放在一个子文件夹里面
	String DATASET_TRAIN_INPUT_ROOT_FILE = "dataset.train.input.root.dir";
	//这个是保存的文件名,不重要
	String DATASET_TRAIN_SVM_VECTOR_FILE = "dataset.train.svm.vector.file";
	
	//与上面类似的
	String DATASET_TEST_INPUT_ROOT_FILE = "dataset.test.input.root.dir";
	String DATASET_TEST_SVM_VECTOR_FILE = "dataset.test.svm.vector.file";
	String DATASET_TEST_SVM_VECTOR_OUTPUT_DIR = "dataset.test.svm.vector.output.dir";
	//new added for me!
	//dataset.train.outputarff.dir=/Users/zhangzhidong
	String DATASET_TRAIN_OUTPUTARFF_DIR="dataset.train.outputarff.dir";
	
	String DOCUMENT_ANALYZER_CLASS = "document.analyzer.class";
	String DOCUMENT_ANALYZER_STOPWORDS_PATH = "document.analyzer.stopwords.path";
	String DOCUMENT_FILTER_CLASSES = "document.filter.classes";
	String DOCUMENT_FILTER_KEPT_LEXICAL_CATEGORIES = "document.filter.kept.lexical.categories";
	
	String FEATURE_VECTOR_SELECTOR_CLASS = "feature.vector.selector.class";
	String FEATURE_EACH_LABEL_KEPT_TERM_PERCENT = "feature.each.label.kept.term.percent";

}
